R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

countries_aggregated = read.csv("data/countries-aggregated.csv")
key_countries_pivoted = read.csv("data/key-countries-pivoted.csv")
reference = read.csv("data/reference.csv")
time_series_19_covid_combined = read.csv("data/time-series-19-covid-combined.csv")
us_confirmed = read.csv("data/us_confirmed.csv")
us_deaths = read.csv("data/us_deaths.csv")
worldwide_aggregated = read.csv("data/worldwide-aggregated.csv")
world_cities = read.csv("data/worldcities.csv")
Statewide = read.csv("data/Statewide (1).csv")
library(ggplot2)
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble  3.0.3     ✓ dplyr   1.0.0
## ✓ tidyr   1.1.0     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ✓ purrr   0.3.4
## ── Conflicts ──────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(leaflet)
library(sp)
library(flexdashboard)
library(broom)
library(modelr)
## 
## Attaching package: 'modelr'
## The following object is masked from 'package:broom':
## 
##     bootstrap
us_confirmed_updated = us_confirmed %>%
  group_by(Province.State) %>%
  summarize(
    Lat = mean(Lat),
    Long = mean(Long),
    total_cases = sum(Case)
  )
## `summarise()` ungrouping output (override with `.groups` argument)
us_confirmed_updated
## # A tibble: 58 x 4
##    Province.State     Lat   Long total_cases
##    <chr>            <dbl>  <dbl>       <int>
##  1 Alabama           31.9  -84.2     2024563
##  2 Alaska            56.4 -138.        60536
##  3 American Samoa   -14.3 -170.            0
##  4 Arizona           29.7  -98.3     3474676
##  5 Arkansas          34.0  -90.0      993778
##  6 California        36.6 -117.     12656799
##  7 Colorado          37.8 -102.      2361609
##  8 Connecticut       33.3  -58.1     3659874
##  9 Delaware          23.5  -45.3      797079
## 10 Diamond Princess   0      0          5923
## # … with 48 more rows
  us_confirmed_updated %>%
    leaflet() %>%
    addProviderTiles("OpenStreetMap.Mapnik") %>%
    addMarkers(
      lng = ~Long,
      lat = ~Lat,
      popup = ~paste("</h3><br>","State:", Province.State, "</h3><br>","Cases:", total_cases,  sep=" ")
    ) 
ggplotly(us_confirmed_updated %>%
  ggplot() + geom_col(mapping=aes(x=Province.State, y=total_cases)))
ggplotly(Statewide %>%
  ggplot() + geom_col(mapping=aes(x=State, y=Effective.Date))+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)))
reference_updated = reference %>%  drop_na() %>%
    group_by(Province.State) %>%
    summarize(
      total_pop = sum(Population)
    )
## `summarise()` ungrouping output (override with `.groups` argument)
ggplotly(reference_updated %>%
  ggplot() + geom_col(mapping=aes(x=Province.State, y=total_pop))+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)))
us_confirmed_updated1 = us_confirmed %>%
  group_by(Province.State) %>%
  summarize(
    total_cases = sum(Case)
  )
## `summarise()` ungrouping output (override with `.groups` argument)
library(dplyr)
us_confirmed_pop = reference_updated %>% inner_join(us_confirmed_updated1 , by="Province.State")
us_confirmed_pop
## # A tibble: 56 x 3
##    Province.State       total_pop total_cases
##    <chr>                    <int>       <int>
##  1 Alabama                9806370     2024563
##  2 Alaska                 1463090       60536
##  3 American Samoa           55641           0
##  4 Arizona               14557434     3474676
##  5 Arkansas               6035608      993778
##  6 California            79024446    12656799
##  7 Colorado              11517472     2361609
##  8 Connecticut            7130574     3659874
##  9 Delaware               1947528      797079
## 10 District of Columbia   1411498      740559
## # … with 46 more rows
confirmed_pop_model = lm(total_pop~total_cases, data=us_confirmed_pop)
confirmed_pop_model%>%
  glance()
## # A tibble: 1 x 12
##   r.squared adj.r.squared  sigma statistic p.value    df logLik   AIC   BIC
##       <dbl>         <dbl>  <dbl>     <dbl>   <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1     0.487         0.477 1.06e7      51.2 2.29e-9     1  -985. 1975. 1981.
## # … with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
confirmed_pop_df = us_confirmed_pop %>%
  add_predictions(confirmed_pop_model)%>%
  add_residuals(confirmed_pop_model)
confirmed_pop_df
## # A tibble: 56 x 5
##    Province.State       total_pop total_cases      pred     resid
##    <chr>                    <int>       <int>     <dbl>     <dbl>
##  1 Alabama                9806370     2024563  9733350.    73020.
##  2 Alaska                 1463090       60536  5988150. -4525060.
##  3 American Samoa           55641           0  5872714. -5817073.
##  4 Arizona               14557434     3474676 12498568.  2058866.
##  5 Arkansas               6035608      993778  7767748. -1732140.
##  6 California            79024446    12656799 30007940. 49016506.
##  7 Colorado              11517472     2361609 10376062.  1141410.
##  8 Connecticut            7130574     3659874 12851721. -5721147.
##  9 Delaware               1947528      797079  7392663. -5445135.
## 10 District of Columbia   1411498      740559  7284885. -5873387.
## # … with 46 more rows
ggplot(confirmed_pop_df) +
  geom_point(mapping = aes(x = pred, y = total_cases)) +
  geom_abline(slope = 1, intercept = 0, color="red")

ggplot(confirmed_pop_df) +
  geom_point(mapping = aes(x = pred, y = total_cases)) +
  geom_abline(slope = confirmed_pop_model$coefficients[2], intercept = confirmed_pop_model$coefficients[1], color="red")

confirmed_pop_df %>%
  ggplot()+geom_histogram(mapping=aes(x=resid), bins=30)